home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
- /* if the pattern is not simple fixed pattern, then after preprocessing */
- /* and generating the masks, the program goes here. four cases: 1. */
- /* the pattern is simple regular expression and no error, then do the */
- /* matching here. 2. the pattern is simple regular expression and */
- /* unit cost errors are allowed: then go to asearch(). */
- /* 3. the pattern is simple regular expression, and the edit cost is */
- /* not uniform, then go to asearch1(). */
- /* if the pattern is regular expression then go to re() if M < 14, */
- /* else go to re1() */
- /* input parameters: old_D_pat: delimiter pattern. */
- /* fd, input file descriptor, M: size of pattern, D: # of errors. */
-
- #include "agrep.h"
- #include "memory.h"
-
- extern int CurrentByteOffset;
- extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[];
- extern int LIMITOUTPUT, LIMITPERFILE;
- extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE;
- extern char D_pattern[];
- extern int TRUNCATE, DD, S;
- extern char Progname[], CurrentFileName[];
- extern int num_of_matched, prev_num_of_matched;
- extern int agrep_initialfd;
- extern int EXITONERROR;
- extern int agrep_inlen;
- extern CHAR *agrep_inbuffer;
- extern int agrep_inpointer;
- extern CHAR *agrep_outbuffer;
- extern int agrep_outlen;
- extern int agrep_outpointer;
- extern FILE *agrep_finalfp;
- extern int errno;
-
- extern int NEW_FILE, POST_FILTER;
-
- /* bitap dispatches job */
-
- int
- bitap(old_D_pat, Pattern, fd, M, D)
- char old_D_pat[], *Pattern;
- int fd, M, D;
- {
- char c;
- register unsigned r1, r2, r3, CMask, i;
- register unsigned end, endpos, r_Init1;
- register unsigned D_Mask;
- int ResidueSize , FIRSTROUND, lasti, print_end, j, num_read;
- int k;
- CHAR *buffer;
-
- D_length = strlen(old_D_pat);
- for(i=0; i<D_length; i++) if(old_D_pat[i] == '^' || old_D_pat[i] == '$')
- old_D_pat[i] = '\n';
- if (REGEX) {
- if (D > 4) {
- fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname);
- if (!EXITONERROR) {
- errno = AGREP_ERROR;
- return -1;
- }
- else exit(2);
- }
- if (M <= SHORTREG) {
- return re(fd, M, D); /* SUN: need to find a even point */
- }
- else {
- return re1(fd, M, D);
- }
- }
- if (D > 0 && JUMP == ON)
- {
- return asearch1(old_D_pat, fd, D);
- }
- if (D > 0)
- {
- return asearch(old_D_pat, fd, D);
- }
- if(I == 0) Init1 = (unsigned)037777777777;
-
- j=0;
-
- r_Init1 = Init1;
- r1 = r2 = r3 = Init[0];
- endpos = D_endpos;
-
- D_Mask = D_endpos;
- for(i=1 ; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;
- D_Mask = ~D_Mask;
- FIRSTROUND = ON;
-
- #if AGREP_POINTER
- if (fd != -1) {
- #endif /*AGREP_POINTER*/
- alloc_buf(fd, &buffer, Max_record+BlockSize+1);
- buffer[Max_record-1] = '\n';
- lasti = Max_record;
- while ((num_read = fill_buf(fd, buffer + Max_record, BlockSize)) > 0)
- {
- i=Max_record;
- end = Max_record + num_read;
- if(FIRSTROUND) {
- i = Max_record - 1 ;
-
- if(DELIMITER) {
- for(k=0; k<D_length; k++) {
- if(old_D_pat[k] != buffer[Max_record+k]) break;
- }
- if(k>=D_length) j--;
- }
-
- FIRSTROUND = OFF;
- }
- if(num_read < BlockSize) {
- strncpy(buffer+Max_record+num_read, old_D_pat, D_length);
- end = end + D_length;
- buffer[end] = '\0';
- }
-
- /* BITAP_PROCESS: the while-loop below */
- while (i < end)
- {
- c = buffer[i++];
- CurrentByteOffset ++;
- CMask = Mask[c];
- r1 = r_Init1 & r3;
- r2 = (( r3 >> 1 ) & CMask) | r1;
- if ( r2 & endpos ) {
- j++;
- if (DELIMITER) CurrentByteOffset -= D_length;
- else CurrentByteOffset -= 1;
- if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
- {
- if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
- num_of_matched++;
-
- if (agrep_finalfp != NULL)
- fprintf(agrep_finalfp, "%s\n", CurrentFileName);
- else {
- int outindex;
- for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
- (CurrentFileName[outindex] != '\0'); outindex++) {
- agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
- }
- if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
- OUTPUT_OVERFLOW;
- free_buf(fd, buffer);
- return -1;
- }
- else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
- agrep_outpointer += outindex;
- }
-
- free_buf(fd, buffer);
- NEW_FILE = OFF;
- return 0;
- }
-
- print_end = i - D_length - 1;
- if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
- if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
- if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
- ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
- free_buf(fd, buffer);
- return 0; /* done */
- }
- }
- lasti = i - D_length;
- TRUNCATE = OFF;
- r2 = r3 = r1 = Init[0];
- r1 = r_Init1 & r3;
- r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
- if (DELIMITER) CurrentByteOffset += 1*D_length;
- else CurrentByteOffset += 1*1;
- }
- c = buffer[i++];
- CurrentByteOffset ++;
- CMask = Mask[c];
- r1 = r_Init1 & r2;
- r3 = (( r2 >> 1 ) & CMask) | r1;
- if ( r3 & endpos ) {
- j++;
- if (DELIMITER) CurrentByteOffset -= D_length;
- else CurrentByteOffset -= 1;
- if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
- {
- if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
- num_of_matched++;
-
- if (agrep_finalfp != NULL)
- fprintf(agrep_finalfp, "%s\n", CurrentFileName);
- else {
- int outindex;
- for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
- (CurrentFileName[outindex] != '\0'); outindex++) {
- agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
- }
- if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
- OUTPUT_OVERFLOW;
- free_buf(fd, buffer);
- return -1;
- }
- else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
- agrep_outpointer += outindex;
- }
-
- free_buf(fd, buffer);
- NEW_FILE = OFF;
- return 0;
- }
-
- print_end = i - D_length - 1;
- if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
- if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
- if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
- ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
- free_buf(fd, buffer);
- return 0; /* done */
- }
- }
- lasti = i - D_length ;
- TRUNCATE = OFF;
- r2 = r3 = r1 = Init[0];
- r1 = r_Init1 & r2;
- r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
- if (DELIMITER) CurrentByteOffset += 1*D_length;
- else CurrentByteOffset += 1*1;
- }
- }
-
- ResidueSize = num_read + Max_record - lasti;
- if(ResidueSize > Max_record) {
- ResidueSize = Max_record;
- TRUNCATE = ON;
- }
- strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize);
- lasti = Max_record - ResidueSize;
- if(lasti < 0) {
- lasti = 1;
- }
- if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
- ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
- free_buf(fd, buffer);
- return 0; /* done */
- }
- }
- free_buf(fd, buffer);
- return 0;
- #if AGREP_POINTER
- }
- else {
- buffer = agrep_inbuffer;
- num_read = agrep_inlen;
- end = num_read;
- /* buffer[end-1] = '\n';*/ /* at end of the text. */
- /* buffer[0] = '\n';*/ /* in front of the text. */
- i = 0;
- lasti = 1;
-
- if(DELIMITER) {
- for(k=0; k<D_length; k++) {
- if(old_D_pat[k] != buffer[k]) break;
- }
- if(k>=D_length) j--;
- }
-
- /* An exact copy of the above: BITAP_PROCESS: the while-loop below */
- while (i < end)
- {
- c = buffer[i++];
- CurrentByteOffset ++;
- CMask = Mask[c];
- r1 = r_Init1 & r3;
- r2 = (( r3 >> 1 ) & CMask) | r1;
- if ( r2 & endpos ) {
- j++;
- if (DELIMITER) CurrentByteOffset -= D_length;
- else CurrentByteOffset -= 1;
- if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE )
- {
- if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
- num_of_matched++;
-
- if (agrep_finalfp != NULL)
- fprintf(agrep_finalfp, "%s\n", CurrentFileName);
- else {
- int outindex;
- for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
- (CurrentFileName[outindex] != '\0'); outindex++) {
- agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
- }
- if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
- OUTPUT_OVERFLOW;
- free_buf(fd, buffer);
- return -1;
- }
- else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
- agrep_outpointer += outindex;
- }
-
- free_buf(fd, buffer);
- NEW_FILE = OFF;
- return 0;
- }
-
- print_end = i - D_length - 1;
- if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
- if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
- if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
- ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
- free_buf(fd, buffer);
- return 0; /* done */
- }
- }
- lasti = i - D_length;
- TRUNCATE = OFF;
- r2 = r3 = r1 = Init[0];
- r1 = r_Init1 & r3;
- r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
- if (DELIMITER) CurrentByteOffset += 1*D_length;
- else CurrentByteOffset += 1*1;
- }
- c = buffer[i++];
- CurrentByteOffset ++;
- CMask = Mask[c];
- r1 = r_Init1 & r2;
- r3 = (( r2 >> 1 ) & CMask) | r1;
- if ( r3 & endpos ) {
- j++;
- if (DELIMITER) CurrentByteOffset -= D_length;
- else CurrentByteOffset -= 1;
- if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE )
- {
- if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {
- num_of_matched++;
-
- if (agrep_finalfp != NULL)
- fprintf(agrep_finalfp, "%s\n", CurrentFileName);
- else {
- int outindex;
- for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
- (CurrentFileName[outindex] != '\0'); outindex++) {
- agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
- }
- if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
- OUTPUT_OVERFLOW;
- free_buf(fd, buffer);
- return -1;
- }
- else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
- agrep_outpointer += outindex;
- }
-
- free_buf(fd, buffer);
- NEW_FILE = OFF;
- return 0;
- }
-
- print_end = i - D_length - 1;
- if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) )
- if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;}
- if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
- ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
- free_buf(fd, buffer);
- return 0; /* done */
- }
- }
- lasti = i - D_length ;
- TRUNCATE = OFF;
- r2 = r3 = r1 = Init[0];
- r1 = r_Init1 & r2;
- r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask;
- if (DELIMITER) CurrentByteOffset += 1*D_length;
- else CurrentByteOffset += 1*1;
- }
- }
-
- return 0;
- }
- #endif /*AGREP_POINTER*/
- }
-
- fill_buf(fd, buf, record_size)
- int fd, record_size;
- unsigned char *buf;
- {
- int num_read=1;
- int total_read=0;
- extern int glimpse_clientdied;
-
- if (fd >= 0) {
- while(total_read < record_size && num_read > 0) {
- if (glimpse_clientdied) return 0;
- num_read = read(fd, buf+total_read, record_size - total_read);
- total_read = total_read + num_read;
- }
- }
- #if AGREP_POINTER
- else return 0; /* should not call this function if buffer is a pointer to a user-specified region! */
- #else /*AGREP_POINTER*/
- else { /* simulate a file */
- total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size;
- memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read);
- agrep_inpointer += total_read;
- /* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/
- }
- #endif /*AGREP_POINTER*/
- if (glimpse_clientdied) return 0;
- return(total_read);
- }
-
- /*
- * In these functions no allocs/copying is done when
- * fd == -1, i.e., agrep is called to search within memory.
- */
-
- void
- alloc_buf(fd, buf, size)
- int fd;
- char **buf;
- int size;
- {
- #if AGREP_POINTER
- if (fd != -1)
- #endif /*AGREP_POINTER*/
- *buf = (char *)malloc(size);
- }
-
- void
- free_buf(fd, buf)
- int fd;
- char *buf;
- {
- #if AGREP_POINTER
- if (fd != -1)
- #endif /*AGREP_POINTER*/
- free(buf);
- }
-